@@ -78,6 +78,14 @@ AWS_ACCESS_KEY="your aws access key" |
||
| 78 | 78 |
# Set AWS_SANDBOX to true if you're developing Huginn code. |
| 79 | 79 |
AWS_SANDBOX=false |
| 80 | 80 |
|
| 81 |
+######################## |
|
| 82 |
+# Various Settings # |
|
| 83 |
+######################## |
|
| 84 |
+ |
|
| 85 |
+# Allow JSONPath eval expresions. i.e., $..price[?(@ < 20)] |
|
| 86 |
+# You should not allow this on a shared Huginn box because it is not secure. |
|
| 87 |
+ALLOW_JSONPATH_EVAL=false |
|
| 88 |
+ |
|
| 81 | 89 |
# Use Graphviz for generating diagrams instead of using Google Chart |
| 82 | 90 |
# Tools. Specify a dot(1) command path built with SVG support |
| 83 | 91 |
# enabled. |
@@ -10,7 +10,7 @@ gem 'bootstrap-kaminari-views', '~> 0.0.2' |
||
| 10 | 10 |
gem 'rufus-scheduler', '~> 3.0.7', require: false |
| 11 | 11 |
gem 'json', '~> 1.8.1' |
| 12 | 12 |
gem 'jsonpath', '~> 0.5.3' |
| 13 |
-gem 'twilio-ruby', '~> 3.10.0' |
|
| 13 |
+gem 'twilio-ruby', '~> 3.11.5' |
|
| 14 | 14 |
gem 'ruby-growl', '~> 4.1.0' |
| 15 | 15 |
|
| 16 | 16 |
gem 'delayed_job', '~> 4.0.0' |
@@ -40,7 +40,7 @@ gem 'nokogiri', '~> 1.6.1' |
||
| 40 | 40 |
|
| 41 | 41 |
gem 'wunderground', '~> 1.2.0' |
| 42 | 42 |
gem 'forecast_io', '~> 2.0.0' |
| 43 |
-gem 'rturk', '~> 2.11.0' |
|
| 43 |
+gem 'rturk', '~> 2.12.1' |
|
| 44 | 44 |
|
| 45 | 45 |
gem 'twitter', '~> 5.7.1' |
| 46 | 46 |
gem 'twitter-stream', github: 'cantino/twitter-stream', branch: 'master' |
@@ -215,7 +215,7 @@ GEM |
||
| 215 | 215 |
rspec-core (~> 2.14.0) |
| 216 | 216 |
rspec-expectations (~> 2.14.0) |
| 217 | 217 |
rspec-mocks (~> 2.14.0) |
| 218 |
- rturk (2.11.3) |
|
| 218 |
+ rturk (2.12.1) |
|
| 219 | 219 |
erector |
| 220 | 220 |
nokogiri |
| 221 | 221 |
rest-client |
@@ -263,7 +263,7 @@ GEM |
||
| 263 | 263 |
treetop (1.4.15) |
| 264 | 264 |
polyglot |
| 265 | 265 |
polyglot (>= 0.3.1) |
| 266 |
- twilio-ruby (3.10.1) |
|
| 266 |
+ twilio-ruby (3.11.5) |
|
| 267 | 267 |
builder (>= 2.1.2) |
| 268 | 268 |
jwt (>= 0.1.2) |
| 269 | 269 |
multi_json (>= 1.3.0) |
@@ -289,7 +289,7 @@ GEM |
||
| 289 | 289 |
macaddr (~> 1.0) |
| 290 | 290 |
warden (1.2.3) |
| 291 | 291 |
rack (>= 1.0) |
| 292 |
- webmock (1.13.0) |
|
| 292 |
+ webmock (1.17.4) |
|
| 293 | 293 |
addressable (>= 2.2.7) |
| 294 | 294 |
crack (>= 0.3.2) |
| 295 | 295 |
weibo_2 (0.1.6) |
@@ -337,14 +337,14 @@ DEPENDENCIES |
||
| 337 | 337 |
rr |
| 338 | 338 |
rspec |
| 339 | 339 |
rspec-rails |
| 340 |
- rturk (~> 2.11.0) |
|
| 340 |
+ rturk (~> 2.12.1) |
|
| 341 | 341 |
ruby-growl (~> 4.1.0) |
| 342 | 342 |
rufus-scheduler (~> 3.0.7) |
| 343 | 343 |
sass-rails (~> 4.0.0) |
| 344 | 344 |
select2-rails (~> 3.5.4) |
| 345 | 345 |
shoulda-matchers |
| 346 | 346 |
therubyracer (~> 0.12.1) |
| 347 |
- twilio-ruby (~> 3.10.0) |
|
| 347 |
+ twilio-ruby (~> 3.11.5) |
|
| 348 | 348 |
twitter (~> 5.7.1) |
| 349 | 349 |
twitter-stream! |
| 350 | 350 |
typhoeus (~> 0.6.3) |
@@ -24,7 +24,7 @@ Follow [@tectonic](https://twitter.com/tectonic) for updates as Huginn evolves, |
||
| 24 | 24 |
|
| 25 | 25 |
### We need your help! |
| 26 | 26 |
|
| 27 |
-Want to help with Huginn? Try tackling [issues tagged with #help-wanted](https://github.com/cantino/huginn/issues?direction=desc&labels=help-wanted&page=1&sort=created&state=open). |
|
| 27 |
+Want to help with Huginn? All contributions are encouraged! You could make UI improvements, add new Agents, write documentation and tutorials, or try tackling [issues tagged with #help-wanted](https://github.com/cantino/huginn/issues?direction=desc&labels=help-wanted&page=1&sort=created&state=open). |
|
| 28 | 28 |
|
| 29 | 29 |
## Examples |
| 30 | 30 |
|
@@ -7,17 +7,16 @@ module Agents |
||
| 7 | 7 |
cannot_create_events! |
| 8 | 8 |
|
| 9 | 9 |
description <<-MD |
| 10 |
- The TwilioAgent receives and collects events and sends them via text message or gives you a call when scheduled. |
|
| 10 |
+ The TwilioAgent receives and collects events and sends them via text message (up to 160 characters) or gives you a call when scheduled. |
|
| 11 | 11 |
|
| 12 |
- It is assumed that events have a `message`, `text`, or `sms` key, the value of which is sent as the content of the text message/call. You can use Event Formatting Agent if your event does not provide these keys. |
|
| 12 |
+ It is assumed that events have a `message`, `text`, or `sms` key, the value of which is sent as the content of the text message/call. You can use the EventFormattingAgent if your event does not provide these keys. |
|
| 13 | 13 |
|
| 14 | 14 |
Set `receiver_cell` to the number to receive text messages/call and `sender_cell` to the number sending them. |
| 15 | 15 |
|
| 16 | 16 |
`expected_receive_period_in_days` is maximum number of days that you would expect to pass between events being received by this agent. |
| 17 | 17 |
|
| 18 |
- If you would like to receive calls, then set `receive_call` to true. `server_url` needs to be |
|
| 19 |
- filled only if you are making calls. Dont forget to include http/https in `server_url`. |
|
| 20 |
- |
|
| 18 |
+ If you would like to receive calls, set `receive_call` to `true`. In this case, `server_url` must be set to the URL of your |
|
| 19 |
+ Huginn installation (probably "https://#{ENV['DOMAIN']}"), which must be web-accessible. Be sure to set http/https correctly.
|
|
| 21 | 20 |
MD |
| 22 | 21 |
|
| 23 | 22 |
def default_options |
@@ -43,13 +42,14 @@ module Agents |
||
| 43 | 42 |
@client = Twilio::REST::Client.new options['account_sid'], options['auth_token'] |
| 44 | 43 |
memory['pending_calls'] ||= {}
|
| 45 | 44 |
incoming_events.each do |event| |
| 46 |
- message = (event.payload['message'] || event.payload['text'] || event.payload['sms']).to_s |
|
| 47 |
- if message != "" |
|
| 45 |
+ message = (event.payload['message'].presence || event.payload['text'].presence || event.payload['sms'].presence).to_s |
|
| 46 |
+ if message.present? |
|
| 48 | 47 |
if options['receive_call'].to_s == 'true' |
| 49 | 48 |
secret = SecureRandom.hex 3 |
| 50 | 49 |
memory['pending_calls'][secret] = message |
| 51 | 50 |
make_call secret |
| 52 | 51 |
end |
| 52 |
+ |
|
| 53 | 53 |
if options['receive_text'].to_s == 'true' |
| 54 | 54 |
message = message.slice 0..160 |
| 55 | 55 |
send_message message |
@@ -71,11 +71,11 @@ module Agents |
||
| 71 | 71 |
def make_call(secret) |
| 72 | 72 |
@client.account.calls.create :from => options['sender_cell'], |
| 73 | 73 |
:to => options['receiver_cell'], |
| 74 |
- :url => post_url(options['server_url'],secret) |
|
| 74 |
+ :url => post_url(options['server_url'], secret) |
|
| 75 | 75 |
end |
| 76 | 76 |
|
| 77 |
- def post_url(server_url,secret) |
|
| 78 |
- "#{server_url}/users/#{self.user.id}/web_requests/#{self.id}/#{secret}"
|
|
| 77 |
+ def post_url(server_url, secret) |
|
| 78 |
+ "#{server_url}/users/#{user.id}/web_requests/#{id}/#{secret}"
|
|
| 79 | 79 |
end |
| 80 | 80 |
|
| 81 | 81 |
def receive_web_request(params, method, format) |
@@ -16,6 +16,8 @@ module Agents |
||
| 16 | 16 |
|
| 17 | 17 |
Specify a `url` and select a `mode` for when to create Events based on the scraped data, either `all` or `on_change`. |
| 18 | 18 |
|
| 19 |
+ `url` can be a single url, or an array of urls (for example, for multiple pages with the exact same structure but different content to scrape) |
|
| 20 |
+ |
|
| 19 | 21 |
The `type` value can be `xml`, `html`, or `json`. |
| 20 | 22 |
|
| 21 | 23 |
To tell the Agent how to parse the content, specify `extract` as a hash with keys naming the extractions and values of hashes. |
@@ -107,85 +109,97 @@ module Agents |
||
| 107 | 109 |
log "Fetching #{options['url']}"
|
| 108 | 110 |
request_opts = { :followlocation => true }
|
| 109 | 111 |
request_opts[:userpwd] = options['basic_auth'] if options['basic_auth'].present? |
| 110 |
- request = Typhoeus::Request.new(options['url'], request_opts) |
|
| 111 | 112 |
|
| 112 |
- request.on_failure do |response| |
|
| 113 |
- error "Failed: #{response.inspect}"
|
|
| 113 |
+ requests = [] |
|
| 114 |
+ |
|
| 115 |
+ if options['url'].kind_of?(Array) |
|
| 116 |
+ options['url'].each do |url| |
|
| 117 |
+ requests.push(Typhoeus::Request.new(url, request_opts)) |
|
| 118 |
+ end |
|
| 119 |
+ else |
|
| 120 |
+ requests.push(Typhoeus::Request.new(options['url'], request_opts)) |
|
| 114 | 121 |
end |
| 115 | 122 |
|
| 116 |
- request.on_success do |response| |
|
| 117 |
- body = response.body |
|
| 118 |
- if (encoding = options['force_encoding']).present? |
|
| 119 |
- body = body.encode(Encoding::UTF_8, encoding) |
|
| 123 |
+ requests.each do |request| |
|
| 124 |
+ request.on_failure do |response| |
|
| 125 |
+ error "Failed: #{response.inspect}"
|
|
| 120 | 126 |
end |
| 121 |
- doc = parse(body) |
|
| 122 | 127 |
|
| 123 |
- if extract_full_json? |
|
| 124 |
- if store_payload!(previous_payloads(1), doc) |
|
| 125 |
- log "Storing new result for '#{name}': #{doc.inspect}"
|
|
| 126 |
- create_event :payload => doc |
|
| 128 |
+ request.on_success do |response| |
|
| 129 |
+ body = response.body |
|
| 130 |
+ if (encoding = options['force_encoding']).present? |
|
| 131 |
+ body = body.encode(Encoding::UTF_8, encoding) |
|
| 127 | 132 |
end |
| 128 |
- else |
|
| 129 |
- output = {}
|
|
| 130 |
- options['extract'].each do |name, extraction_details| |
|
| 131 |
- if extraction_type == "json" |
|
| 132 |
- result = Utils.values_at(doc, extraction_details['path']) |
|
| 133 |
- log "Extracting #{extraction_type} at #{extraction_details['path']}: #{result}"
|
|
| 134 |
- else |
|
| 135 |
- case |
|
| 136 |
- when css = extraction_details['css'] |
|
| 137 |
- nodes = doc.css(css) |
|
| 138 |
- when xpath = extraction_details['xpath'] |
|
| 139 |
- nodes = doc.xpath(xpath) |
|
| 133 |
+ doc = parse(body) |
|
| 134 |
+ |
|
| 135 |
+ if extract_full_json? |
|
| 136 |
+ if store_payload!(previous_payloads(1), doc) |
|
| 137 |
+ log "Storing new result for '#{name}': #{doc.inspect}"
|
|
| 138 |
+ create_event :payload => doc |
|
| 139 |
+ end |
|
| 140 |
+ else |
|
| 141 |
+ output = {}
|
|
| 142 |
+ options['extract'].each do |name, extraction_details| |
|
| 143 |
+ if extraction_type == "json" |
|
| 144 |
+ result = Utils.values_at(doc, extraction_details['path']) |
|
| 145 |
+ log "Extracting #{extraction_type} at #{extraction_details['path']}: #{result}"
|
|
| 140 | 146 |
else |
| 141 |
- error "'css' or 'xpath' is required for HTML or XML extraction" |
|
| 142 |
- return |
|
| 143 |
- end |
|
| 144 |
- unless Nokogiri::XML::NodeSet === nodes |
|
| 145 |
- error "The result of HTML/XML extraction was not a NodeSet" |
|
| 146 |
- return |
|
| 147 |
- end |
|
| 148 |
- result = nodes.map { |node|
|
|
| 149 |
- if extraction_details['attr'] |
|
| 150 |
- node.attr(extraction_details['attr']) |
|
| 151 |
- elsif extraction_details['text'] |
|
| 152 |
- node.text() |
|
| 147 |
+ case |
|
| 148 |
+ when css = extraction_details['css'] |
|
| 149 |
+ nodes = doc.css(css) |
|
| 150 |
+ when xpath = extraction_details['xpath'] |
|
| 151 |
+ nodes = doc.xpath(xpath) |
|
| 153 | 152 |
else |
| 154 |
- error "'attr' or 'text' is required on HTML or XML extraction patterns" |
|
| 153 |
+ error "'css' or 'xpath' is required for HTML or XML extraction" |
|
| 155 | 154 |
return |
| 156 | 155 |
end |
| 157 |
- } |
|
| 158 |
- log "Extracting #{extraction_type} at #{xpath || css}: #{result}"
|
|
| 156 |
+ unless Nokogiri::XML::NodeSet === nodes |
|
| 157 |
+ error "The result of HTML/XML extraction was not a NodeSet" |
|
| 158 |
+ return |
|
| 159 |
+ end |
|
| 160 |
+ result = nodes.map { |node|
|
|
| 161 |
+ if extraction_details['attr'] |
|
| 162 |
+ node.attr(extraction_details['attr']) |
|
| 163 |
+ elsif extraction_details['text'] |
|
| 164 |
+ node.text() |
|
| 165 |
+ else |
|
| 166 |
+ error "'attr' or 'text' is required on HTML or XML extraction patterns" |
|
| 167 |
+ return |
|
| 168 |
+ end |
|
| 169 |
+ } |
|
| 170 |
+ log "Extracting #{extraction_type} at #{xpath || css}: #{result}"
|
|
| 171 |
+ end |
|
| 172 |
+ output[name] = result |
|
| 159 | 173 |
end |
| 160 |
- output[name] = result |
|
| 161 |
- end |
|
| 162 | 174 |
|
| 163 |
- num_unique_lengths = options['extract'].keys.map { |name| output[name].length }.uniq
|
|
| 175 |
+ num_unique_lengths = options['extract'].keys.map { |name| output[name].length }.uniq
|
|
| 164 | 176 |
|
| 165 |
- if num_unique_lengths.length != 1 |
|
| 166 |
- error "Got an uneven number of matches for #{options['name']}: #{options['extract'].inspect}"
|
|
| 167 |
- return |
|
| 168 |
- end |
|
| 169 |
- |
|
| 170 |
- old_events = previous_payloads num_unique_lengths.first |
|
| 171 |
- num_unique_lengths.first.times do |index| |
|
| 172 |
- result = {}
|
|
| 173 |
- options['extract'].keys.each do |name| |
|
| 174 |
- result[name] = output[name][index] |
|
| 175 |
- if name.to_s == 'url' |
|
| 176 |
- result[name] = URI.join(options['url'], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil? |
|
| 177 |
- end |
|
| 177 |
+ if num_unique_lengths.length != 1 |
|
| 178 |
+ error "Got an uneven number of matches for #{options['name']}: #{options['extract'].inspect}"
|
|
| 179 |
+ return |
|
| 178 | 180 |
end |
| 181 |
+ |
|
| 182 |
+ old_events = previous_payloads num_unique_lengths.first |
|
| 183 |
+ num_unique_lengths.first.times do |index| |
|
| 184 |
+ result = {}
|
|
| 185 |
+ options['extract'].keys.each do |name| |
|
| 186 |
+ result[name] = output[name][index] |
|
| 187 |
+ if name.to_s == 'url' |
|
| 188 |
+ result[name] = URI.join(options['url'], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil? |
|
| 189 |
+ end |
|
| 190 |
+ end |
|
| 179 | 191 |
|
| 180 |
- if store_payload!(old_events, result) |
|
| 181 |
- log "Storing new parsed result for '#{name}': #{result.inspect}"
|
|
| 182 |
- create_event :payload => result |
|
| 192 |
+ if store_payload!(old_events, result) |
|
| 193 |
+ log "Storing new parsed result for '#{name}': #{result.inspect}"
|
|
| 194 |
+ create_event :payload => result |
|
| 195 |
+ end |
|
| 183 | 196 |
end |
| 184 | 197 |
end |
| 185 | 198 |
end |
| 199 |
+ |
|
| 200 |
+ hydra.queue request |
|
| 201 |
+ hydra.run |
|
| 186 | 202 |
end |
| 187 |
- hydra.queue request |
|
| 188 |
- hydra.run |
|
| 189 | 203 |
end |
| 190 | 204 |
|
| 191 | 205 |
private |
@@ -16,7 +16,7 @@ group "huginn" do |
||
| 16 | 16 |
action :create |
| 17 | 17 |
end |
| 18 | 18 |
|
| 19 |
-%w("ruby1.9.1" "ruby1.9.1-dev" "libxslt-dev" "libxml2-dev" "curl").each do |pkg|
|
|
| 19 |
+%w("ruby1.9.1" "ruby1.9.1-dev" "libxslt-dev" "libxml2-dev" "curl" "libmysqlclient-dev").each do |pkg|
|
|
| 20 | 20 |
package pkg do |
| 21 | 21 |
action :install |
| 22 | 22 |
end |
@@ -49,9 +49,9 @@ bash "huginn dependencies" do |
||
| 49 | 49 |
export LC_ALL="en_US.UTF-8" |
| 50 | 50 |
sudo bundle install |
| 51 | 51 |
sed s/REPLACE_ME_NOW\!/$(sudo rake secret)/ .env.example > .env |
| 52 |
- sudo rake db:create |
|
| 53 |
- sudo rake db:migrate |
|
| 54 |
- sudo rake db:seed |
|
| 52 |
+ sudo bundle exec rake db:create |
|
| 53 |
+ sudo bundle exec rake db:migrate |
|
| 54 |
+ sudo bundle exec rake db:seed |
|
| 55 | 55 |
EOH |
| 56 | 56 |
end |
| 57 | 57 |
|
@@ -14,7 +14,7 @@ group "huginn" do |
||
| 14 | 14 |
members ["huginn"] |
| 15 | 15 |
end |
| 16 | 16 |
|
| 17 |
-%w("ruby1.9.1" "ruby1.9.1-dev" "libxslt-dev" "libxml2-dev" "curl" "libshadow-ruby1.8").each do |pkg|
|
|
| 17 |
+%w("ruby1.9.1" "ruby1.9.1-dev" "libxslt-dev" "libxml2-dev" "curl" "libshadow-ruby1.8" "libmysqlclient-dev").each do |pkg|
|
|
| 18 | 18 |
package("#{pkg}")
|
| 19 | 19 |
end |
| 20 | 20 |
|
@@ -84,9 +84,9 @@ deploy "/home/huginn" do |
||
| 84 | 84 |
sudo cp /home/huginn/shared/config/nginx.conf /etc/nginx/ |
| 85 | 85 |
sudo bundle install |
| 86 | 86 |
sed -i s/REPLACE_ME_NOW\!/$(sudo rake secret)/ .env |
| 87 |
- sudo rake db:create |
|
| 88 |
- sudo rake db:migrate |
|
| 89 |
- sudo rake db:seed |
|
| 87 |
+ sudo bundle exec rake db:create |
|
| 88 |
+ sudo bundle exec rake db:migrate |
|
| 89 |
+ sudo bundle exec rake db:seed |
|
| 90 | 90 |
sudo foreman export upstart /etc/init -a huginn -u huginn -l log |
| 91 | 91 |
sudo start huginn |
| 92 | 92 |
EOH |
@@ -56,7 +56,7 @@ module Utils |
||
| 56 | 56 |
escape = false |
| 57 | 57 |
end |
| 58 | 58 |
|
| 59 |
- result = JsonPath.new(path, :allow_eval => false).on(data.is_a?(String) ? data : data.to_json) |
|
| 59 |
+ result = JsonPath.new(path, :allow_eval => ENV['ALLOW_JSONPATH_EVAL'] == "true").on(data.is_a?(String) ? data : data.to_json) |
|
| 60 | 60 |
if escape |
| 61 | 61 |
result.map {|r| CGI::escape r }
|
| 62 | 62 |
else |
@@ -79,4 +79,4 @@ module Utils |
||
| 79 | 79 |
def self.pretty_jsonify(thing) |
| 80 | 80 |
JSON.pretty_generate(thing).gsub('</', '<\/')
|
| 81 | 81 |
end |
| 82 |
-end |
|
| 82 |
+end |
@@ -91,6 +91,30 @@ describe Agents::WebsiteAgent do |
||
| 91 | 91 |
@checker.check |
| 92 | 92 |
@checker.logs.first.message.should =~ /Got an uneven number of matches/ |
| 93 | 93 |
end |
| 94 |
+ |
|
| 95 |
+ it "should accept an array for url" do |
|
| 96 |
+ @site['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"] |
|
| 97 |
+ @checker.options = @site |
|
| 98 |
+ lambda { @checker.save! }.should_not raise_error;
|
|
| 99 |
+ lambda { @checker.check }.should_not raise_error;
|
|
| 100 |
+ end |
|
| 101 |
+ |
|
| 102 |
+ it "should parse events from all urls in array" do |
|
| 103 |
+ lambda {
|
|
| 104 |
+ @site['url'] = ["http://xkcd.com/", "http://xkcd.com/"] |
|
| 105 |
+ @site['mode'] = 'all' |
|
| 106 |
+ @checker.options = @site |
|
| 107 |
+ @checker.check |
|
| 108 |
+ }.should change { Event.count }.by(2)
|
|
| 109 |
+ end |
|
| 110 |
+ |
|
| 111 |
+ it "should follow unique rules when parsing array of urls" do |
|
| 112 |
+ lambda {
|
|
| 113 |
+ @site['url'] = ["http://xkcd.com/", "http://xkcd.com/"] |
|
| 114 |
+ @checker.options = @site |
|
| 115 |
+ @checker.check |
|
| 116 |
+ }.should change { Event.count }.by(1)
|
|
| 117 |
+ end |
|
| 94 | 118 |
end |
| 95 | 119 |
|
| 96 | 120 |
describe 'encoding' do |